* Note that this analysis requires structural change on the build data

* (2) Prepare the data set for analysis ----------------------------------------

	* Load the data
	use $output1, clear
	
	* Switch to the appropriate dates
	gen pre 				= pre_${spec}
	gen post 				= post_${spec}
	gen training 		 	= training_${spec}
	gen training_started 	= ${spec}_training_started
	gen training_ended	 	= ${spec}_training_ended
	
	* Sample restriction: 
	keep inrange(year_month, $prestart, $postend)
	
	* List of variables to be included in PRS calculations
	gl prs_items 	warrant veh_rec firearm traf_stop driv_stop cont anov ///
					haz_cit nonhaz_cit curf_vio cta park_cit isrs
					
					// Warrant, vehicle recover, firearm violation 
					// traffic stop, driver stop, ANOVs, 
					// Citations hazard, citation non-hazard, curfew violation
					// parking citation, ISRs
	
		gl prs_items_1 
		foreach item in $prs_items{ 
		gl prs_items_1 $prs_items_1 prs_`item'
		}

* (2) Identifying pre and post period ------------------------------------------
		
	* Relative time to training period (pte = "post training ended (pte)" 

		* Training period as the '0' point
		gen month_pte = 0 if training==1

		* Identify month post training ended
		replace month_pte = ///
			(floor(year_month/100)-floor(training_ended / 100))*12 ///
			+ mod(year_month,100) - mod(training_ended,100) ///
			if training!=1 & year_month>training_ended

		* Identify month pre 202002
		replace month_pte = ///
			-((floor(training_started/100)-floor(year_month/ 100))*12 ///
			+ mod(training_started,100) - mod(year_month,100)) ///
			if training!=1 & year_month<training_started
			
* (3) Index variables ----------------------------------------------------------
		
	* Month variable 
	gen month = month_pte 
							
	* Individual months
	
		* 25 months before training
		forvalues m = 1(1)25  {
				gen months_n_`m' = (month == -`m')
		}
		
		* 18 months after training
		forvalues m = 1(1)18  {
			gen months_`m' = (month == `m')
		}
			
		* Periods for indexing 
		gl pp
		forvalues i=-25(1)18 {
		if `i' !=0 {
		gl pp $pp `i'
		}
		}
		di "$pp"
							
	* Create Index
		
		foreach period in $pp {
			
			preserve
				
					* Period to be evaluated
					di "`period'"
					
					* Condition the period name
					if `period'<0 {
						local aux = `period'*(-1)
						local period = "n_`aux'"
					}

					* Restrict the data set to the given period
					*gen months_all = 1
					keep if months_`period' == 1
					
						
						* (1) PRS Outcomes
						foreach v in $prs_items {
						z_cal prs_`v'		
						}
						
							* PRS score without arrests
							egen prs_score = rowmean(z_prs*)
							
							* PRS score with arrest
							z_cal arrest_non_con_cop
							egen prs_score_w_arrs = rowmean(z_prs* z_arrest_non_con_cop)
												
						* (2) Rename variables
						rename (prs_score prs_score_w_arrs z_arrest_non_con_cop) ///
						   (prs_score_`period' prs_score_w_arrs_`period' ///
								z_arrest_non_con_cop_`period' )
						   
						 foreach v in $prs_items{
						 rename z_prs_`v' z_prs_`v'_`period' 
						 }
				   
						* (7) Clean up
						keep employee_id year_month ///
							prs_score_`period' prs_score_w_arrs_`period' ///
							z_prs_*_`period' z_arrest_non_con_cop_`period' 
						
					tempfile score_`period'
					save `score_`period''

				restore
			}
	

		* Merging back index data 
		
		gen prs_score_w_arrs_cut_1m = . 
		foreach period in $pp {
				
			* Condition the period name
			if `period'<0 {
				local aux = `period'*(-1)
				local period = "n_`aux'"
			}

			merge 1:1 employee_id year_month using `score_`period'', nogen
			replace prs_score_w_arrs_cut_1m = prs_score_w_arrs_`period' /// 
												if months_`period' ==1
			}
		
	
* (4) Data preparation ---------------------------------------------------------
	
	* Renaming outcomes 
	rename 	(trr_nf_p2020_1or2_C arrest_con_cop days_iod prs_score_w_arrs_cut_1m) ///
			(NLU DAR OI AI1m)

	* Outcomes
	global outcomes NLU DAR OI AI1m
	
	* Defining post training period, training-treatment and post period treatment interaction
	gen post_training = (month_pte>0)
	gen treat_post = treatment * post_training
	
	* Indicator of data to model
	gen main_model = (inrange(month_pte,1,4))
	gen dif_dif_model = (inrange(month_pte, -25, -1) | inrange(month_pte, 1, 4) )
						
	* Covariates 
	global socio_covars experience dblack dwhite dhispanic dmale
	gl picked5a	pre_arrest_con_cop pre_prs_score_w_arrs pre_days_iod ///
				experience dblack dwhite dhispanic dmale 
					
		
						
* (4) SUEST estimation ------------------------------------------------------

	* Setting file name	
	
	* Use main admin data to pull baseline covars 
	preserve
	use $interim_data , clear
	
		keep if month==1
		keep employee_id pre_arrest_con_cop pre_prs_score_w_arrs pre_days_iod
		
		tempfile main_with_controls
		save `main_with_controls'
		
	restore 
	
	merge m:1 employee_id using `main_with_controls', nogen keep(3)
		
		
	* SUEST results 
	
		* Matrix
		mat container = J(1,9,.)
		mat original = J(1,8,.)
		
		foreach outcome in $outcomes{
			
			* CM
			qui: sum `outcome' if treatment == 0 & dif_dif_model==1
			local CM2 = r(mean)
			
			qui: sum `outcome' if treatment == 0 & main_model==1
			local CM1 = r(mean)
			
			
			* Traditional reporting block
			qui:reg `outcome' ///
				post_training treatment treat_post ///
				$socio_covars i.strata ib202004.year_month ///
				if dif_dif_model==1, vce(cluster employee_id)
			qui: test treat_post
			mat o2 = `CM2', _b[treat_post], _se[treat_post], r(p)
			

			qui:reg `outcome' ///
				treatment ///
				$picked5a i.strata ib202004.year_month ///
				if main_model==1, vce(cluster employee_id)
			qui: test treatment
			mat o1 = `CM1', _b[treatment], _se[treatment], r(p)
				
			mat add = o1, o2
			
			mat original = original \ add
	 
	 
			* SUEST block
		
			qui:reg `outcome' ///
				treatment ///
				$picked5a i.strata ib202004.year_month ///
				if main_model==1
			est store main_ols 
			
			qui:reg `outcome' ///
				post_training treatment treat_post ///
				$socio_covars i.strata ib202004.year_month ///
				if dif_dif_model==1
			est store dif_dif 

		
			suest dif_dif main_ols, vce(cluster employee_id)
	
			* Store 
			test [main_ols_mean]treatment
			mat p1 = _b[main_ols_mean:treatment], ///
					_se[main_ols_mean:treatment], ///
					r(p)
			test [dif_dif_mean]treat_post
			mat p2 = _b[dif_dif_mean:treat_post], ///
					_se[dif_dif_mean:treat_post], ///
					r(p)
				
				
		
			* Testing 
			di "`outcome'"
			test [dif_dif_mean]treat_post = [main_ols_mean]treatment
			mat results = `CM1', p1, `CM2', p2, r(p)
			mat container = container \ results
	}
	
	matlist container 
	matlist original
	

* (5) Tables for reviewers -----------------------------------------------------
					
	* Matrix container
	mat mat_dif_dif= J(1,13, .)
						
	* Running regression 
	foreach outcome in $outcomes{

		* Multiplier
		local mul = 1
		if inlist("`outcome'", "NLU", "DAR") {
		local mul =  1000
		}
	
		* CM overall
		qui: sum `outcome' if treatment==0 & dif_dif_model==1 
		local CM = r(mean)
			
		* CM pre period
		qui: sum `outcome' if treatment==0 & month_pte<0 & dif_dif_model==1 
		local CM_pre = r(mean)

		* CM post period
		qui: sum `outcome' if treatment==0 & month_pte>=0 & dif_dif_model==1 
		local CM_post = r(mean)

	* Estimation
	qui: reg `outcome' post_training treatment treat_post $socio_covars ///
			i.strata i.year_month if dif_dif_model==1  , ///
			vce(cluster employee_id) 
					
	* Store	
	qui: test post_training
	mat P1 =  	`CM'* `mul', `CM_pre'* `mul', `CM_post' * `mul', ///
				_b[post_training] * `mul', _se[post_training] * `mul', ///
				r(p)
	di "C"				
	qui: test treatment
	mat P2 =   _b[treatment] * `mul', _se[treatment] * `mul', r(p)
	
	qui: test treat_post
	mat P3 =   _b[treat_post] * `mul', _se[treat_post]* `mul', r(p), e(N)
	mat result = P1, P2, P3
		
	
	mat rownames result = "`outcome'"
	mat mat_dif_dif = mat_dif_dif \ result
}

mat colnames mat_dif_dif = "CM" "CM_pre" "CM_post" "Coef" "S.E." "P-value" ///
						"Coef" "S.E." "P-value" "Coef" "S.E." "P-value" "N" 
matlist mat_dif_dif
	